We will be using two different approaches to mapping during this lab. The first way is to use the latitude and longitude to identify position on the map. The second is to add the information to shapes in a map based on the name of the shapes (i.e. states). There is also a tool called ggmaps where you can use Google Maps to map data, but that is beyond the scope of this class.
library(tidyverse)
## -- Attaching packages ------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ---------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(mapdata)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:dplyr':
##
## intersect, setdiff, union
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(viridis)
## Loading required package: viridisLite
library(wesanderson)
Here we build a graph using all the coordinate information. This is not summarized by country so there are many points in the US for US counties.
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_")
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("world", colour = NA, fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
## Warning: Removed 54 rows containing missing values (geom_point).
Now we want to zoom in on the 48 US states. To do this Alaska, Hawaii and US Territories are filtered. Some US State entries have a Lat and Long of zero, so these are filtered as well. The borders() function is used to specify the areas in the map.
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("state", colour = "black", fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
Here is a prettier version:
mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),
breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
scale_color_viridis_c(option="viridis",name="Cases",
trans="log", breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
# Cleaning up the graph
theme_void() +
guides( colour = guide_legend()) +
labs(title = "Anisa Dhana's layout for COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 40 rows containing missing values (geom_point).
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
group_by(Province_State) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Province_State = tolower(Province_State))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
# load the US map data
us <- map_data("state")
# We need to join the us map data with our daily report to make one data frame/tibble
state_join <- left_join(us, daily_report, by = c("region" = "Province_State"))
This is a bit of a digression back to Labs 3 and 4, but there are many R color palattes to choose from or you can create your own. In the above a simple gradient is used. The example from Anisa Dhana uses the viridis palatte which is designed to be perceived by viewers with common forms of colour blindness.
# plot state map
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in the US'")
Now look by the counties using RColorBrewer package.
library(RColorBrewer)
# To display only colorblind-friendly brewer palettes, specify the option colorblindFriendly = TRUE as follow:
# display.brewer.all(colorblindFriendly = TRUE)
# Get and format the covid report data
report_03_27_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
# dim(report_03_27_2020)
# get and format the map data
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
# Join the 2 tibbles
state_join <- left_join(counties, report_03_27_2020, by = c("Key"))
# sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "PuRd"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "White") +
ggtitle("Number of Confirmed Cases by US County") +
theme_bw()
## Warning: Transformation introduced infinite values in discrete y-axis
Just Massachusetts:
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Province_State == "Massachusetts") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_character(),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
us <- map_data("state")
ma_us <- subset(us, region == "massachusetts")
counties <- map_data("county")
ma_county <- subset(counties, region == "massachusetts")
state_join <- left_join(ma_county, daily_report, by = c("subregion" = "Admin2"))
# plot state map
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuGn"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in Massachusetts'")
Note the cases on Nantucket and Dukes counties were reported as one value and not included on the graph. There is also an unasssigned category that includes 303 Confirmed cases as of 3/31/2020.
daily_report
## # A tibble: 14 x 2
## Admin2 Confirmed
## <chr> <dbl>
## 1 barnstable 283
## 2 berkshire 213
## 3 bristol 424
## 4 dukes and nantucket 12
## 5 essex 1039
## 6 franklin 85
## 7 hampden 546
## 8 hampshire 102
## 9 middlesex 1870
## 10 norfolk 938
## 11 plymouth 621
## 12 suffolk 1896
## 13 unassigned 270
## 14 worcester 667
plotly was introducesd in Lab 5 and is a great way to make interactive graphs with maps.
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous")) +
ggtitle("COVID-19 Cases in MA") +
# Cleaning up the graph
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank())
)
Here is an example with the world map:
# Read in the daily report
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_") %>%
group_by(Country_Region) %>%
summarize(Confirmed = sum(Confirmed), Deaths = sum(Deaths))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
# Read in the world map data
world <- as_tibble(map_data("world"))
# Check to see if there are differences in the naming of countries
setdiff(world$region, daily_report$Country_Region)
## [1] "Aruba" "Anguilla"
## [3] "American Samoa" "Antarctica"
## [5] "French Southern and Antarctic Lands" "Antigua"
## [7] "Barbuda" "Saint Barthelemy"
## [9] "Bermuda" "Ivory Coast"
## [11] "Democratic Republic of the Congo" "Republic of Congo"
## [13] "Cook Islands" "Cape Verde"
## [15] "Curacao" "Cayman Islands"
## [17] "Czech Republic" "Canary Islands"
## [19] "Falkland Islands" "Reunion"
## [21] "Mayotte" "French Guiana"
## [23] "Martinique" "Guadeloupe"
## [25] "Faroe Islands" "Micronesia"
## [27] "UK" "Guernsey"
## [29] "Greenland" "Guam"
## [31] "Heard Island" "Isle of Man"
## [33] "Cocos Islands" "Christmas Island"
## [35] "Chagos Archipelago" "Jersey"
## [37] "Siachen Glacier" "Kiribati"
## [39] "Nevis" "Saint Kitts"
## [41] "South Korea" "Saint Martin"
## [43] "Marshall Islands" "Macedonia"
## [45] "Myanmar" "Northern Mariana Islands"
## [47] "Montserrat" "New Caledonia"
## [49] "Norfolk Island" "Niue"
## [51] "Bonaire" "Sint Eustatius"
## [53] "Saba" "Nauru"
## [55] "Pitcairn Islands" "Palau"
## [57] "Puerto Rico" "North Korea"
## [59] "Madeira Islands" "Azores"
## [61] "Palestine" "French Polynesia"
## [63] "South Sandwich Islands" "South Georgia"
## [65] "Saint Helena" "Ascension Island"
## [67] "Solomon Islands" "Saint Pierre and Miquelon"
## [69] "Swaziland" "Sint Maarten"
## [71] "Turks and Caicos Islands" "Turkmenistan"
## [73] "Tonga" "Trinidad"
## [75] "Tobago" "Taiwan"
## [77] "USA" "Vatican"
## [79] "Grenadines" "Saint Vincent"
## [81] "Virgin Islands" "Vanuatu"
## [83] "Wallis and Futuna" "Samoa"
# Many of these countries are considered states or territories in the JHU covid reports, but let's fix a few of them
world <- as_tibble(map_data("world")) %>%
mutate(region = str_replace_all(region, c("USA" = "US", "Czech Republic" = "Czechia",
"Ivory Coast" = "Cote d'Ivoire", "Democratic Republic of the Congo" = "Congo (Kinshasa)",
"Republic of Congo" = "Congo (Brazzaville)")))
# Join the covid report with the map data
country_join <- left_join(world, daily_report, by = c("region" = "Country_Region"))
# Create the graph
ggplotly(
ggplot(data = world, mapping = aes(x = long, y = lat, text = region, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = country_join, aes(fill = Deaths), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous")) +
labs(title = "COVID-19 Deaths'")
)
ggplotly(ggplot(daily_report, aes(x = Long, y = Lat, text = Country_Region, size = Confirmed))...)## Warning: Transformation introduced infinite values in discrete y-axis